
import codecs
import sys
import glob
import re
import pepe.p_txt.bigtxt as bt

for f in glob.glob('*.dat'):

    o = codecs.open(f + '_modify.#', 'wb', 'utf-8')

    #for line in bt.lines(f):
    i = codecs.open(f, 'rb', 'utf-8')
    lines = i.readlines()
    for line in lines:
        # removing empty space at the end of line
        line = re.sub(' {0,}\" {0,}\x0d\x0A', '|\x0d\x0A', line)
        # removing quote on line start
        line = re.sub('^\"', '', line)
        
        # DENNMXF 57130 rows Txt Qual UTF-8.txt.dat .... refer to first 3 bytes
        line = re.sub('\xEF\xBB\xBF\"', '\xEF\xBB\xBF', line)

        # removing empty space around quotes
        line = re.sub(' {0,}\"\|\" {0,}', '|', line)
        
        # replacing two and more space with 'nothing'
        line = re.sub(' {2}', '', line)

        o.write(line)

    o.close()

